package uf.tax.laws.spider.sd;

import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import uf.tax.laws.spider.HttpDAO;

/**
 * 获取Category列表
 * 
 * @author sunny
 *
 */
public class GetCatList {
	private HttpDAO http;

	public GetCatList(HttpDAO http) {
		this.http = http;
	}

	public Map<String, String> getLists() {
		String listContent = http
				.get("http://www.taxad.cn/news/newscat.aspx?catid=2");
		Map<String, String> resultMap = new HashMap<String, String>();
		Pattern pattern = Pattern
				.compile(
						"<a href=\"/news/newstsort.aspx\\?catid=(\\d+)\">(.*?)</a></li>",
						Pattern.DOTALL);
		Matcher matcher = pattern.matcher(listContent);
		while (matcher.find()) {
			resultMap.put(matcher.group(2), matcher.group(1));
		}
		return resultMap;
	}

}
